clear all
set more off
cap log close
cd "${master_dir}"
log using "${log_dir}/5-PlaceboDays.log", replace
***************************************************************************************************
* 
* Program: 5-PlaceboDays.do
* Purpose: Create Placebo Days Figures
* Sections:
*     1. Placebo Day Wait Time Histograms & Main Specification Coefficients
* Files Used:
*     1. Pings_all_days.dta
*     2. likelyvoters.dta
*     3. IdentifiedRegularPingers.dta
*     4. block_group_data_2017.dta
* Files Created:
*     1. 5-PlaceboDays.log
*     2. Figure A2: app_A_f2_1.png - app_A_f2_16.png
*     2. Figure 1: f1c_filtered.png
*     4. Figure A8: app_A_f8_placebo_coef.png
*     5. Intermediate Outputs: app_A_f8_coefficients.tex, app_A_f8_output.dta
*
***************************************************************************************************

***************************************************************************************************
*  1. Placebo Day Wait Time Histograms & Main Specification Coefficients
***************************************************************************************************

for X in any b se v1 v2: matrix define placebo_X = J(1,15,.)
local i = 0
foreach X in 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 {
    local i = `i'+1
    * Load main dataset before merging in filter variables from other constructed datasets
    qui use PollingPlace_ID ID_11_16 Dist_to_PollingPlace_M day local_date_sec statefips ///
      Ping_in_ConvexHull Sec_Since_Last Sec_Till_Next_Ping gisjoin if day == td(`X'nov2016) & ///
      Dist_to_PollingPlace_M <= 60 using "${data_dir}/Pings_all_days.dta", clear
      
    * Merge in "Likely Voter" Person Identifier (Filter)
    qui merge m:1 ID_11_16 using "$data_dir/likelyvoters.dta", keepusing(likelyvoter_v1_d`X')
    qui drop if _merge == 2
    replace likelyvoter_v1_d`X' = 0 if likelyvoter_v1_d`X' == .
    drop _merge
    qui rename likelyvoter_v1_d`X' likelyvoter_v1
    
    merge m:1 ID_11_16 using "$data_dir/IdentifiedRegularPingers.dta"
    drop if _merge == 2
    drop _merge

    gen consistentpinger = (uniquepinghours >= 12 & uniquepinghours != .)
    bysort ID_11_16 PollingPlace_ID: egen enteredpoll = max(Ping_in_ConvexHull)

    bys ID_11 PollingPlace_ID: egen double earliestping = min(local_date_sec)
    bys ID_11 PollingPlace_ID: egen double latestping = max(local_date_sec)
    gen double lowerbound = (latestping - earliestping)/1000

    gen double sec_before_earliest_ping1 = Sec_Since_Last if earliestping == local_date_sec
    bys ID_11 PollingPlace_ID: egen double sec_before_earliest_ping = max(sec_before_earliest_ping1)
    drop sec_before_earliest_ping1

    gen double sec_after_latest_ping1 = Sec_Till_Next_Ping if latestping == local_date_sec
    bys ID_11 PollingPlace_ID: egen double sec_after_latest_ping = max(sec_after_latest_ping1)
    drop sec_after_latest_ping1

    gen double upperbound = lowerbound + sec_before_earliest_ping + sec_after_latest_ping
    replace upperbound = upperbound / 60
    replace lowerbound = lowerbound / 60

    gen waittime = lowerbound + (upperbound - lowerbound)/2

    gen reasonablevalues = ((upperbound > 1 & upperbound != .) & (upperbound < (60*2)))

    gen hour_of_arrival = hh(earliestping)

    sort gisjoin
    merge m:1 gisjoin using "$raw_dir/block_group_data_2017.dta"
    drop if _merge == 2
    drop _merge
    
    gsort -upperbound
    egen tag_ID = tag(ID)
    
    keep if tag_ID == 1
    keep if likelyvoter_v1 == 1 & enteredpoll == 1 & consistentpinger == 1 & reasonablevalues == 1

    * Run the primary regression
    disp "November `X', 2016"
    qui reg waittime race_black, cl(PollingPlace_ID)
    disp _b[race_black]
    qui estimates store D`i'
    qui matrix placebo_b[1,`i'] = _b[race_black]
    qui matrix placebo_se[1,`i'] = _se[race_black]
    sum waittime if e(sample)
    qui estadd scalar DepVarMean = r(mean)
        
    * Save counts of voters on each day using likely voter filter v1 & v2 
    qui count
    qui matrix placebo_v1[1,`i'] = r(N)
	
	* Number of unique polling places, and average "race_black" of the sample
	unique PollingPlace_ID
	global polls: di %9.0fc `r(unique)'
	global N: di %9.0fc `r(N)'
	sum race_black, d
	global fracblack: di %9.2fc `r(mean)'	
    
    * Appendix Figure A2: Overall Wait Time Histogram for each Placebo Day
    histogram waittime, width(1.5) frequency color(gray%50) ///
      lcolor(gray) xtitle("Wait Time in Minutes") ///
      ylab(,nogrid format(%16.0gc)) ytitle("Frequency") xlab(0(10)120, format(%16.0gc)) ///
      title("Histogram: Wait Time on November `i'") ///
	  legend(on order(- "Unique Polling Places: $polls" ///
	  "Observations:              $N" "Avg Fraction Black:    $fracblack") pos(2) ring(0)) ///
      graphregion(fcolor(white) lcolor(white)) ///
      saving($result_dir/app_A_f2_`i'.gph,replace)
    graph export "$result_dir/app_A_f2_`i'.png",  replace
}

esttab D1 D2 D3 D4 D5 D6 D7 D8 D9 D10 D11 D12 D13 D14 D15 using $result_dir/app_A_f8_coefficients.tex, ///
  legend sty(fixed) stats(N r2 DepVarMean, fmt(%16.0gc 3 3) label("N" "\$R^2$")) ///
  star(* 0.10 ** 0.05 *** 0.01) label replace keep(race_black) coeflabel(race_black ///
  "Fraction Black") cells(b(star fmt(2)) se(par fmt(2))) mtitle(Nov2 Nov2 Nov3 Nov4 Nov5 Nov6 ///
  Nov7 Nov8 Nov9 Nov10 Nov11 Nov12 Nov13 Nov14 Nov15)
est clear

* Construct dataset from matrix outputs
clear
set obs 15
gen P=_n
for X in any coeff cil ciu: gen race_black_P_X = .
for X in any v1 v2: gen likelyvoters_X = .

forvalues i = 1/15 {
    replace race_black_P_coeff = placebo_b[1,`i'] if P == `i'
    replace race_black_P_cil = placebo_b[1,`i'] - 1.96*placebo_se[1,`i'] ///
      if P == `i'
    replace race_black_P_ciu = placebo_b[1,`i'] + 1.96*placebo_se[1,`i'] ///
      if P == `i'
    replace likelyvoters_v1 = placebo_v1[1,`i'] if P == `i'
}
save $data_dir/app_A_f8_output.dta, replace

use $data_dir/app_A_f8_output.dta, clear

* Figure 1c: Voter Volume by Day (All Filters Applied)
separate likelyvoters_v1, by(P == 8)
sum likelyvoters_v1 if P == 8
local n1 = r(mean)
local he1 = `n1' + 8000
local he2 = `n1' + 8000 
local dispn1: di %16.0fc `n1'
tw (bar likelyvoters_v11 P, barwidth(1) fcolor(vermillion) fintensity(50) lcolor(vermillion) ///
  legend(off)) (bar likelyvoters_v10 P, barwidth(1) fcolor(gray) fintensity(50) lcolor(gray) ///
  legend(off)), $white title("(c) Identified Voters (Filtered Sample)") ///
  ylab(0 "0" 10000 "10" 20000 "20" 30000 "30" 40000 "40" 50000 "50" 60000 "60" 70000 "70" 80000 ///
  "80" 90000 "90" 100000 "100" 110000 "110" 120000 "120" 130000 "130" 140000 "140" 150000 "150" ///
  160000 "160", nogrid valuelabel labsize(medium) format(%16.0gc) ///
  angle(horizontal)) ytitle("Number of Voters (Thousands)", size(large)) ///
  text(`he1' 8.55 "`=ustrunescape("\u2190")' Election Day", place(se) size(vlarge)) ///
  text(`he2' 8.55 " ""`dispn1'""", place(se) size(vlarge)) ///
  text(35000 4 "Placebo Days", size(vlarge)) ///
  text(35000 12 "Placebo Days", size(vlarge)) ///
  xlabel(1(1)15, labsize(medium)) ///
  xtitle("Date in November 2016", size(large)) saving($result_dir/f1c_filtered.gph,replace)
graph export "$result_dir/f1c_filtered.png",  replace
 
* Plot preferred specification coefficients across each day's regression
sum race_black_P_coeff if P == 8
local n1 = r(mean)
local h1 = `n1' + 2.6
local h2 = `n1' + 1.2
local dispn1: di %16.2fc `n1'
sum race_black_P_coeff if P == 1
local n2 = r(mean)
local hl1 = `n2' + 3.4
local hl2 = `n2' + 2
local dispn2: di %16.2fc `n2'
sum race_black_P_coeff if P == 15
local n3 = r(mean)
local hu1 = `n3' + 4
local hu2 = `n3' + 2.6
local dispn3: di %16.2fc `n3'
tw (scatter race_black_P_coeff P, c(1 2) mcolor(blue) lcolor(ltblue) ///
  msize(small) lstyle(solid)) (rcap race_black_P_cil race_black_P_ciu P, ///
  lcolor(ltblue) legend(off)), $white title("All Days (Preferred Specification)") ///
  ylab(-24(1)8, nogrid valuelabel labsize(small) format(%16.0gc) angle(horizontal)) ///
  xlab(1(1)15, valuelabel labsize(small) angle(horizontal)) yline(0, lcolor(gs12)) ///
  text(`h1' 8.05 "Election Day", place(n)) ///
  text(`h2' 7.5 "`dispn1'", place(n)) ///
  text(`hl1' 2.08 "Tuesday Before", place(n)) ///
  text(`hl2' .88 "`dispn2'", place(n)) ///
  text(`hu1' 14.3 "Tuesday After", place(n)) ///
  text(`hu2' 14.4 "`dispn3'", place(n)) ///
  ytitle("Coefficients on 'Fraction Black' from Separate Regressions") ///
  xtitle("Date in November 2016") ///
  saving($result_dir/app_A_f8_placebo_coef.gph,replace)
graph export $result_dir/app_A_f8_placebo_coef.png, replace

log close

stop

